#!/bin/bash

# Optional configuration:
PGBINDIR=${PGBINDIR:-"/var/lib/postgresql/dbs/postgresql-HEAD/bin"}
PGCWORKDIR=${PGCWORKDIR:-"/tmp"}
PSQL="${PGBINDIR}/psql --pset pager=off "
PGCOMITSCHEMAS=${PGCOMITSCHEMAS:-"('pg_catalog'),('information_schema')"}

### Capture default URI based on what's in the environment
###  Use PGHOST/PGUSER/PGPORT if assigned

PGDB=${PGDB:-"comparisondatabase"}
PGHOST=${PGHOST:-"localhost"}
PGUSER=${PGUSER:-"postgres"}
PGPORT=${PGPORT:-"5432"}
URIC="postgresql://${PGUSER}@${PGHOST}:${PGPORT}/${PGDB}"
PGURI=${PGURI:-${URIC}}

PGCLABEL1=${PGCLABEL1:-"db1"}
PGCLABEL2=${PGCLABEL2:-"db2"}

PGCEXPLANATIONS=${PGCEXPLANATIONS:-"${PGCWORKDIR}/explanations.txt"}
PGCFULLOUTPUT=${PGCFULLOUTPUT:-"${PGCWORKDIR}/fulloutput.txt"}
PGCUNEXPLAINED=${PGCUNEXPLAINED:-"${PGCWORKDIR}/unexplained.txt"}
PGCBADEXPLAIN=${PGCBADEXPLAIN:-"${PGCWORKDIR}/badexplanations.txt"}
PGCMPINPUT1=${PGCMPINPUT1:-${PGCWORKDIR}/${PGCLABEL1}.copy}
PGCMPINPUT2=${PGCMPINPUT2:-${PGCWORKDIR}/${PGCLABEL2}.copy}
PGCMPIGNORESLONYTRIGGERS=${PGCMPIGNORESLONYTRIGGERS:-"false"}

touch $PGCEXPLANATIONS

echo "pgcmp: Comparing schema data

 Labels:
   PGCLABEL1=[${PGCLABEL1}]
   PGCLABEL2=[${PGCLABEL2}]

 Data files:
   PGCMPINPUT1=[${PGCMPINPUT1}]
   PGCMPINPUT2=[${PGCMPINPUT2}]

 Omit schemas:
   PGCOMITSCHEMAS=[${PGCOMITSCHEMAS}]
   PGCMPIGNORESLONYTRIGGERS=[${PGCMPIGNORESLONYTRIGGERS}]

 Work Database:
   PGURI - ${PGURI}

Explanations Input list (PGCEXPLANATIONS): [${PGCEXPLANATIONS}]
Full output: (PGCFULLOUTPUT) [${PGCFULLOUTPUT}]
Unexplained items output: (PGCUNEXPLAINED) [${PGCUNEXPLAINED}]
Unexplained items as explanation: (PGCBADEXPLAIN) [${PGCBADEXPLAIN}]
Schemas to omit: (PGCOMITSCHEMAS) [${PGCOMITSCHEMAS}]
Should we ignore Slony-I triggers? (PGCMPIGNORESLONYTRIGGERS) [${PGCMPIGNORESLONYTRIGGERS}]

Expanded command line: [PGCMPINPUT1=${PGCMPINPUT1} PGCMPINPUT2=${PGCMPINPUT2} PGCLABEL1=${PGCLABEL1} PGCLABEL2=${PGCLABEL2}  PGURI=${PGURI} PGCEXPLANATIONS=${PGCEXPLANATIONS} PGCFULLOUTPUT=${PGCFULLOUTPUT} PGCBADEXPLAIN=${PGCBADEXPLAIN} PGCOMITSCHEMAS=\"${PGCOMITSCHEMAS}\" PGCMPIGNORESLONYTRIGGERS=${PGCMPIGNORESLONYTRIGGERS} pgcmp]
"

function compare_data () {

    COMPARESCRIPT="
drop table if exists data_raw, schemas_to_ignore, raw_differences, expected_differences, object_types, fulloutput, slony_triggered_tables, slony_triggers, unexplained_items, badexplanations_items cascade;
create table data_raw (
    label text,
    id serial,
    primary key (id),
    object_schema text,
    object_type text,
    object_name text,
    object_definition text
);
comment on column data_raw.label is 'Label indicating data source';
comment on column data_raw.id is 'unique value to help construct primary key';
comment on column data_raw.object_schema is 'namespace (pg_namespace) containing the object';
comment on column data_raw.object_type is 'type of object';
comment on column data_raw.object_name is 'fully qualified name of the object';
comment on column data_raw.object_definition is 'definition of the object';

create index db1_tnd on data_raw(object_type, object_name);
truncate data_raw;

\\copy data_raw (label, object_schema, object_type, object_name, object_definition) from '${PGCMPINPUT1}';
\\copy data_raw (label, object_schema, object_type, object_name, object_definition) from '${PGCMPINPUT2}';

drop table if exists schemas_to_ignore;
create table schemas_to_ignore (
  schema_name text primary key,
  schema_name_quoted text
);

insert into schemas_to_ignore (schema_name)
  values ${PGCOMITSCHEMAS};

update schemas_to_ignore
  set schema_name_quoted = quote_ident(schema_name);
create index stig_quoted on schemas_to_ignore(schema_name_quoted);

analyze schemas_to_ignore;

update data_raw set object_definition = '' where object_definition is null;

drop table if exists raw_differences cascade;
create table raw_differences (
  object_schema text,
  object_name text,
  object_type text,
  label1 text,
  id1 integer references data_raw(id),
  object_definition1 text,
  label2 text, 
  id2 integer references data_raw(id),
  object_definition2 text,
  difference_type text,
  importance integer
);

comment on column raw_differences.object_schema is 'namespace containing the object';
comment on column raw_differences.object_name is 'fully qualified object name';
comment on column raw_differences.object_type is 'type of object';
comment on column raw_differences.label1 is 'Label of 1st data source';
comment on column raw_differences.id1 is 'ID of raw data in 1st data source';
comment on column raw_differences.object_definition1 is 'Definition of object in 1st data source';
comment on column raw_differences.label2 is 'Label of 2nd data source';
comment on column raw_differences.id2 is 'ID of raw data in 2nd data source';
comment on column raw_differences.object_definition2 is 'Definition of object in 2nd data source';
comment on column raw_differences.difference_type is 'type of difference recognized';
comment on column raw_differences.importance is 'Priority of difference - low values are most important';

--- Find all the items that generally match...
insert into raw_differences (label1, id1, object_type, object_schema, object_name, object_definition1, 
                             label2, id2, object_definition2, difference_type)
select r1.label, r1.id, r1.object_type, r1.object_schema, r1.object_name, r1.object_definition,
       r2.label, r2.id, r2.object_definition,
       case when r1.object_definition = r2.object_definition then 'match'
            else 'mismatch' end
from data_raw r1, data_raw r2
where r1.label = '${PGCLABEL1}' and
      r2.label = '${PGCLABEL2}' and
      r1.object_type = r2.object_type and
      r1.object_name = r2.object_name;

insert into raw_differences (label1, id1, object_type, object_schema, object_name, object_definition1, difference_type)
select r1.label, r1.id, r1.object_type, r1.object_schema, r1.object_name, r1.object_definition, 'missing in 2nd DB'
from data_raw r1
where not exists (select 1 from data_raw r2 where
                 r1.object_type = r2.object_type and
                 r1.object_name = r2.object_name);

insert into raw_differences (label1, id1, object_type, object_schema, object_name, object_definition1, difference_type)
select r1.label, r1.id, r1.object_type, r1.object_schema, r1.object_name, r1.object_definition, 'missing in 2nd DB'
from data_raw r1
where 
     r1.label = '${PGCLABEL1}' and
     not exists (select 1 from data_raw r2 where
                 r1.object_type = r2.object_type and
                 r1.object_name = r2.object_name and r2.label <> r1.label)
union all
select r1.label, r1.id, r1.object_type, r1.object_schema, r1.object_name, r1.object_definition, 'missing in 1st DB'
from data_raw r1
where 
     r1.label = '${PGCLABEL2}' and
     not exists (select 1 from data_raw r2 where
                 r1.object_type = r2.object_type and
                 r1.object_name = r2.object_name and r2.label <> r1.label);

create index rd_label1 on raw_differences(label1);
create index rd_id1 on raw_differences(id1);
create index rd_type on raw_differences(object_type);
create index rd_name on raw_differences(object_name);
create index rd_schema on raw_differences(object_schema);
create index rd_label2 on raw_differences(label2);
create index rd_id2 on raw_differences(id2);

delete from raw_differences
  where object_schema in (select schema_name from schemas_to_ignore);

delete from raw_differences
  where object_schema in (select schema_name_quoted from schemas_to_ignore);

update raw_differences 
  set importance = NULL
  where difference_type = 'match';

update raw_differences 
  set importance = 1
  where difference_type = 'mismatch';

update raw_differences 
  set importance = 2
  where difference_type like 'missing in % DB';

-- If schema is missing, then bump up all importances for all objects
-- in that schema to indicate diminished importance
update raw_differences
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_schema in (select object_schema from raw_differences where difference_type like 'missing in %DB' and object_type = 'schema')
        and object_type <> 'schema' ;

-- If table is missing, then bump up importances for subservient
-- objects

update raw_differences rd
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_type in ('table owner', 'table permissions', 'trigger', 'column', 'foreign key', 'index') and exists
        (select 1 from raw_differences rdt where rdt.difference_type like 'missing in %DB' and
                       rdt.object_type = 'table' and rdt.object_schema = rd.object_schema and
                       (rd.object_name = rdt.object_name or rd.object_name like (rdt.object_name || '.%')));

update raw_differences rd
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_type in ('column', 'view permissions', 'view owner', 'view definition') and exists
        (select 1 from raw_differences rdt where rdt.difference_type like 'missing in %DB' and
                       rdt.object_type = 'view' and rdt.object_schema = rd.object_schema and
                       (rd.object_name = rdt.object_name or rd.object_name like (rdt.object_name || '.%')));

update raw_differences rd
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_type in ('sequence permissions') and exists
        (select 1 from raw_differences rdt where rdt.difference_type like 'missing in %DB' and
                       rdt.object_type = 'sequence' and rdt.object_schema = rd.object_schema and
                       (rd.object_name = rdt.object_name or rd.object_name like (rdt.object_name || '.%')));

update raw_differences rd
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_type in ('data type owner') and exists
        (select 1 from raw_differences rdt where rdt.difference_type like 'missing in %DB' and
                       rdt.object_type = 'data type' and rdt.object_schema = rd.object_schema and
                       (rd.object_name = rdt.object_name or rd.object_name like (rdt.object_name || '.%')));

update raw_differences rd
  set importance = importance + 1
  where difference_type like 'missing in %DB' and
        object_type in 
               ('function definition', 'function config', 'function language',
                'function owner', 'function permissions', 'function security type')
         and exists
        (select 1 from raw_differences rdt where rdt.difference_type like 'missing in %DB' and
                       rdt.object_type = 'functiondefinition' and rdt.object_schema = rd.object_schema and
                       (rd.object_name = rdt.object_name or rd.object_name like (rdt.object_name || '.%')));

drop table if exists expected_differences cascade;
create table expected_differences (
    object_type text,
    object_schema text,
    object_name text,
    difference_type text,
    difference_cause text,
    importance integer,
    primary key(object_type, object_name, difference_type, difference_cause)
);

\\copy expected_differences from '${PGCEXPLANATIONS}';


drop table if exists object_types cascade;
create table object_types (
   object_type text primary key,
   description text
);

insert into object_types (object_type, description)
values 
('header', 'placeholder - ignore'),
('index', 'index on table'),
('check constraint', 'CHECK constraint on table'),
('foreign key', 'foreign key constraint'),
('data type', 'data type'),
('data type owner', 'owner of data type'),
('function config', 'stored function configuration (SET values and such)'),
('function definition', 'definition of stored function'),
('function language', 'language of implementation of stored function'),
('function owner', 'who owns this stored function?'),
('function security type', 'stored function SECURITY DEFINER (ala setuid)'),
('function permissions', 'per-role privileges for a stored function'),
('trigger', 'trigger'),
('operator', 'operator'),
('operator owner', 'owner of operator'),
('view definition', 'definition of VIEW'),
('view', 'basic information about VIEW'),
('view owner', 'owner of VIEW'),
('view permissions', 'permissions on VIEW'),
('table definition', 'definition of TABLE'),
('table', 'basic information about TABLE'),
('table owner', 'owner of TABLE'),
('table permissions', 'permissions on TABLE'),
('schema', 'schema/NAMESPACE'),
('schema permissions', 'permissions for schema'),
('sequence', 'sequence object'),
('sequence permissions', 'permissions for sequence object'),
('role', 'role (user/abstract role)'),
('column', 'attribute of table/view/sequence'),
('options', 'table storage options');

drop table if exists fulloutput;
create table fulloutput (
  object_name text,
  object_schema text,
  object_type text references object_types(object_type),
  label1 text,
  object_definition1 text,
  label2 text,
  object_definition2 text,
  difference_type text,
  difference_cause text,
  importance integer
);
create index fo_main on fulloutput(object_name, object_type);

--- Perform full reconciliation

--- 1.   Expected case: perfect match, no explanation needed OR PROVIDED
insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, NULL::text, importance
from raw_differences r where difference_type = 'match' and not exists
(select 1 from expected_differences d where r.object_name = d.object_name and r.object_type = d.object_type);

--- 2.  Not so much expected...  Perfect match, but we have a
---     reconciling entry indicating why there *shouldn't* be a match

insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, d.difference_cause, r.importance
from raw_differences r, expected_differences d where r.difference_type = 'match' and 
r.object_name = d.object_name and r.object_type = d.object_type;

--- 3.  Expected...   missing, and explained...

insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, d.difference_cause, r.importance
from raw_differences r, expected_differences d where r.difference_type in ('missing in 1st DB', 'missing in 2nd DB') and 
r.object_name = d.object_name and r.object_type = d.object_type;

--- 4.  Not expected...   missing, and NOT explained...

insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, NULL::text, r.importance
from raw_differences r where r.difference_type in ('missing in 1st DB', 'missing in 2nd DB') and 
not exists 
(select 1 from expected_differences d where r.object_name = d.object_name and r.object_type = d.object_type);

--- 5.  Expected...   mismatch, and explained...

insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, d.difference_cause, r.importance
from raw_differences r, expected_differences d where r.difference_type in ('mismatch') and 
r.object_name = d.object_name and r.object_type = d.object_type;

--- 6.  Not expected...   missing, and NOT explained...

insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, NULL::text, r.importance
from raw_differences r where r.difference_type in ('mismatch') and 
not exists 
(select 1 from expected_differences d where r.object_name = d.object_name and r.object_type = d.object_type);

--- 7.  Ignore differences arising from omitted schemas
insert into fulloutput (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, difference_cause, importance)
select r.object_name, r.object_schema, r.object_type, r.label1, r.object_definition1, r.label2, r.object_definition2, r.difference_type, NULL::text, r.importance
from raw_differences r where r.difference_type in ('mismatch', 'missing in 1st DB', 'missing in 2nd DB') and exists (select 1 from schemas_to_ignore where schema_name = r.object_schema);

drop table if exists unexplained_items;
create table unexplained_items (
  object_name text,
  object_schema text,
  object_type text,
  label1 text,
  id1 integer,
  object_definition1 text,
  label2 text,
  id2 integer,
  object_definition2 text,
  difference_type text,
  importance integer
);

drop table if exists slony_triggered_tables;
create table slony_triggered_tables (
  schema_name text,
  table_name text
);

insert into slony_triggered_tables (schema_name, table_name)
 select object_schema, object_name
 from data_raw r1
 where object_type = 'table' and
 exists (select 1 from data_raw r2 where
         r1.object_schema = r2.object_schema and
         r2.object_type = 'trigger' and
         (
          (r2.object_name like (r1.object_name || '_%_denyaccess/\"INSERT\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE') or
		  (r2.object_name like (r1.object_name || '_%_denyaccess/\"DELETE\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE') or
		  (r2.object_name like (r1.object_name || '_%_denyaccess/\"UPDATE\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE') or
          (r2.object_name like (r1.object_name || '_%_logtrigger/\"INSERT\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER') or
		  (r2.object_name like (r1.object_name || '_%_logtrigger/\"DELETE\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER') or
		  (r2.object_name like (r1.object_name || '_%_logtrigger/\"UPDATE\"') and
           r2.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER')))
group by 1, 2;

create index stt_st on slony_triggered_tables(schema_name, table_name);

drop table if exists slony_triggers;
create table slony_triggers (schema_name text, table_name text, trigger_name text);
insert into slony_triggers (schema_name, table_name, trigger_name)
  select r.object_schema, st.table_name, r.object_name
  from data_raw r, slony_triggered_tables st
  where
   r.object_schema = st.schema_name and
   r.object_type = 'trigger' and
   (
    (r.object_name like st.table_name || '_%_logtrigger/\"INSERT\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER') or
    (r.object_name like st.table_name || '_%_logtrigger/\"UPDATE\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER') or
    (r.object_name like st.table_name || '_%_logtrigger/\"DELETE\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.logtrigger(%),orientation:ROW,timing:AFTER') or
    (r.object_name like st.table_name || '_%_denyaccess/\"INSERT\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE') or
    (r.object_name like st.table_name || '_%_denyaccess/\"UPDATE\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE') or
    (r.object_name like st.table_name || '_%_denyaccess/\"DELETE\"' and
     r.object_definition like 'action:EXECUTE PROCEDURE _%.denyaccess(%),orientation:ROW,timing:BEFORE'))
   group by 1, 2, 3;


do \$\$
begin
   if ('${PGCMPIGNORESLONYTRIGGERS}' = 'true') then
      raise notice 'Adding expected differences for Slony Triggers';
      insert into expected_differences (object_type, object_schema, object_name, difference_type, difference_cause, importance)
         select 'trigger', schema_name, trigger_name, 'missing in 2nd DB', 'Slony-I Replication', 1
         from slony_triggers group by 1, 2, 3, 4, 5, 6;
   end if;
end
\$\$ language plpgsql;

insert into unexplained_items (object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, importance)
select object_name, object_schema, object_type, label1, object_definition1, label2, object_definition2, difference_type, importance
from raw_differences r
where r.difference_type not in ('match') and
not exists 
(select 1 from expected_differences d where r.object_name = d.object_name and r.object_type = d.object_type);


\\copy fulloutput to '${PGCFULLOUTPUT}';
\\copy unexplained_items to '${PGCUNEXPLAINED}';

drop table if exists badexplanations_items;
create table badexplanations_items as
select object_type, object_schema, object_name, difference_type, 'Not Yet Properly Explained'::text as difference_cause, importance
from unexplained_items
union all
select object_type, object_schema, object_name, difference_type, 'Still Not Properly Explained'::text as difference_cause, importance
from expected_differences where difference_cause in ('Not Properly Yet Explained', 'Still Not Properly Explained')
union all
select object_type, object_schema, object_name, difference_type, 'Misexplained', importance
from expected_differences d 
  where exists (select 1 from raw_differences r where r.object_type = d.object_type and r.object_name = d.object_name and (r.difference_type <> 'match' and r.difference_type <> d.difference_type));

\echo Results Summary
select object_type, difference_type, difference_cause, importance, count(*) from fulloutput group by 2, 1, 3, 4 order by 4, 2, 1, 3;

\\copy badexplanations_items to '${PGCBADEXPLAIN}';

\echo Inadequately Explained Items
select object_type, object_name, difference_type, difference_cause, importance from badexplanations_items order by object_type, object_name;

"
    echo "${COMPARESCRIPT}" > ${PGCWORKDIR}/load-comparison-data.sql

    ${PSQL} -d "${PGURI}" -f ${PGCWORKDIR}/load-comparison-data.sql > ${PGCWORKDIR}/perform-comparison.log 2>&1 
    retcode=$?
    if [ $retcode -ne 0 ]; then
	echo "Could not run comparison against database PGDATABASE=${PGURI}"
	echo "See comparison log: ${PGCWORKDIR}/perform-comparison.log"
	exit 2
    fi

    if [[ -s ${PGCBADEXPLAIN} ]] ; then
	baditemcount=`wc -l ${PGCBADEXPLAIN}`
	echo "Number of items inadequately explained: ${baditemcount}"
	exit 3
    fi
}

compare_data
echo "pgcmp complete - no differences found"
